sink("traffic.ratings.comparison-psrm-log.txt",append=F,type="output")

##
#Comparing Several Measures of News Domain Traffic (Appendix A)
##
load('political.comparison-psrm.RData')
load('top500.comparison-psrm.RData')

#Top 50 (All Sites)
top.overall <- top500.comparison[1:50,]
traffic.share.top.all <- cor(top.overall$view.share,top.overall$view.share.comscore,use='complete.obs')
visitor.share.top.all<- cor(top.overall$visitor.share,top.overall$visitor.share.comscore,use='complete.obs')
visits.top.all <- cor(top.overall$views.per.active.panelist,top.overall$views.per.active.panelist.comscore,use='complete.obs')

#Top 500 (All Sites)
traffic.share.all <- cor(top500.comparison$view.share,top500.comparison$view.share.comscore,use='complete.obs')
visitor.share.all <- cor(top500.comparison$visitor.share,top500.comparison$visitor.share.comscore,use='complete.obs')
visits.all <- cor(top500.comparison$views.per.active.panelist,top500.comparison$views.per.active.panelist.comscore,use='complete.obs')

#Top 50 (Political Site List)
top.political <- political.comparison[1:50,]
traffic.share.top.political <- cor(top.political$view.share,top.political$view.share.comscore,use='complete.obs')
visitor.share.top.political <- cor(top.political$visitor.share,top.political$visitor.share.comscore,use='complete.obs')
visits.top.political <- cor(top.political$views.per.active.panelist,top.political$views.per.active.panelist.comscore,use='complete.obs')

#Full Political Site List 
traffic.share.political <- cor(political.comparison$view.share,political.comparison$view.share.comscore,use='complete.obs')
visitor.share.political <- cor(political.comparison$visitor.share,political.comparison$visitor.share.comscore,use='complete.obs')
visits.political <- cor(political.comparison$views.per.active.panelist,political.comparison$views.per.active.panelist.comscore,use='complete.obs')

traffic.comparison.frame <- cbind.data.frame( c(visitor.share.top.all,visitor.share.all,visitor.share.top.political,visitor.share.political),c(traffic.share.top.all,traffic.share.all,traffic.share.top.political,traffic.share.political),c(visits.top.all,visits.all,visits.top.political,visits.political))
names(traffic.comparison.frame) <- c('Visitor Share','Traffic Share','Visits per Panelist')

#Table A1
traffic.comparison.frame

##
#Comparing Several Measures of News Domain Partisanship (Appendix C)
##
load('site.comparison-psrm.RData')
site.comparison <- subset(site.comparison,site.comparison$unique.visitors > 50)
site.comparison.high.traffic <- subset(site.comparison,site.comparison$unique.visitors > 300)

#Information Contained in Table C1
#Comparing Audience-Based Measure (Republican Partisanship) to Bakshy/Flaxman/Content Analysis (High-Traffic Sites)
audience.bakshy <- cor(x=site.comparison.high.traffic$republican.partisanship,y=site.comparison.high.traffic$avg_align,use='pairwise.complete.obs',method='pearson')
audience.flaxman <- cor(x=site.comparison.high.traffic$republican.partisanship,y=site.comparison.high.traffic$flaxman.rating,use='pairwise.complete.obs',method='pearson')
audience.content <- cor(x=site.comparison.high.traffic$republican.partisanship,y=site.comparison.high.traffic$article.rating,use='pairwise.complete.obs',method='pearson')

#Comparing Content-Based Measure (Average Article Rating) to Bakshy/Flaxman/Content Analysis (High-Traffic Sites)
content.bakshy <- cor(x=site.comparison.high.traffic$article.rating,y=site.comparison.high.traffic$avg_align,use='pairwise.complete.obs',method='pearson')
content.flaxman <- cor(x=site.comparison.high.traffic$article.rating,y=site.comparison.high.traffic$flaxman.rating,use='pairwise.complete.obs',method='pearson')
content.audience <- cor(x=site.comparison.high.traffic$article.rating,y=site.comparison.high.traffic$republican.partisanship,use='pairwise.complete.obs',method='pearson')

#First Column Table C1
audience.content;audience.flaxman;audience.bakshy
#Second Column Table C1
content.audience;content.flaxman;content.bakshy


#Figure C1 - Comparing Audience and Content Domain Measures (All sites with more than 50 visitors)
pdf(file='figure-c1.pdf',height=4,width=5)
plot(x=site.comparison$republican.partisanship,y=site.comparison$article.rating,xlab="% Republican Visit Share (Audience)",ylab="Average Article Rating (Content)",pch=16,col='gray',las=1,main="Comparison of Domain Rating Methods")
abline(reg=lm(site.comparison$article.rating~site.comparison$republican.partisanship),lwd=2)
dev.off()
sink()